# national geographic and weather service data
# data are publically available
# download data from three stations
# daily weather information of the three stations
weather_df =
rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
var = c("PRCP", "TMIN", "TMAX"),
date_min = "2017-01-01",
date_max = "2017-12-31") %>%
mutate(
name = recode(id, USW00094728 = "CentralPark_NY",
USC00519397 = "Waikiki_HA",
USS0023B17S = "Waterhole_WA"),
# .1 celc has to devided by 10
tmin = tmin / 10,
tmax = tmax / 10) %>%
select(name, id, everything())
## Registered S3 method overwritten by 'crul':
## method from
## as.character.form_file httr
## Registered S3 method overwritten by 'hoardr':
## method from
## print.cache_info httr
## file path: /Users/macbook/Library/Caches/rnoaa/ghcnd/USW00094728.dly
## file last updated: 2019-09-04 21:33:58
## file min/max dates: 1869-01-01 / 2019-09-30
## file path: /Users/macbook/Library/Caches/rnoaa/ghcnd/USC00519397.dly
## file last updated: 2019-09-04 21:34:09
## file min/max dates: 1965-01-01 / 2019-09-30
## file path: /Users/macbook/Library/Caches/rnoaa/ghcnd/USS0023B17S.dly
## file last updated: 2019-09-04 21:34:13
## file min/max dates: 1999-09-01 / 2019-09-30
weather_df
## # A tibble: 1,095 x 6
## name id date prcp tmax tmin
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 CentralPark_NY USW00094728 2017-01-01 0 8.9 4.4
## 2 CentralPark_NY USW00094728 2017-01-02 53 5 2.8
## 3 CentralPark_NY USW00094728 2017-01-03 147 6.1 3.9
## 4 CentralPark_NY USW00094728 2017-01-04 0 11.1 1.1
## 5 CentralPark_NY USW00094728 2017-01-05 0 1.1 -2.7
## 6 CentralPark_NY USW00094728 2017-01-06 13 0.6 -3.8
## 7 CentralPark_NY USW00094728 2017-01-07 81 -3.2 -6.6
## 8 CentralPark_NY USW00094728 2017-01-08 0 -3.8 -8.8
## 9 CentralPark_NY USW00094728 2017-01-09 0 -4.9 -9.9
## 10 CentralPark_NY USW00094728 2017-01-10 0 7.8 -6
## # … with 1,085 more rows
ggplot(weather_df, aes(x = tmin, y = tmax))
ggplot(weather_df, aes(x = tmin, y = tmax)) +
geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).
# alternative way of making this plot
# default is printing the plot
# Most time not using the style
#weather_df %>% filter(name =="CentralPark_NY")
#scaterplot = weather_df %>%
# ggplot(aes(x = timin, y = tmax)) + geom_point()
#scaterplot
#weather_df %>%
# ggplot(aes(x = tmin, y = tmax)) +
# geom_point()
plot_weather =
weather_df %>%
ggplot(aes(x = tmin, y = tmax))
plot_weather + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).
ggplot(weather_df, aes(x = tmin, y = tmax)) +
geom_point(aes(color = name))
## Warning: Removed 15 rows containing missing values (geom_point).
ggplot(weather_df, aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
# se: grey area not certain area, not very useful to set as true
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
# facet
# do not send multiple plots.
ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) +
geom_point(alpha = .5) +
geom_smooth(se = FALSE) +
facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
# this is fine, but not very interesting
# color = name is global
ggplot(weather_df, aes(x = date, y = tmax, color = name)) +
#size = prcp, put bubbles accounts for prcp amont
geom_point(aes(size = prcp), alpha = .5) +
geom_smooth(se = FALSE) +
facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
# alpha is the transparent level. .1 is not at all
weather_df %>%
filter(name == "CentralPark_NY") %>%
mutate(tmax_fahr = tmax * (9 / 5) + 32,
tmin_fahr = tmin * (9 / 5) + 32) %>%
ggplot(aes(x = tmin_fahr, y = tmax_fahr)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm", se = FALSE)
ggplot(weather_df, aes(x = date, y = tmax, color = name)) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
# 2d density
# with each location, how many data points in each location
# when you want to show counts in each point, use color to show density
# geom_bin2d() does the samething has geom_hex()
ggplot(weather_df, aes(x = tmax, y = tmin)) +
geom_hex()
## Warning: Removed 15 rows containing non-finite values (stat_binhex).
ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).
ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).
ggplot(weather_df, aes(x = tmax)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).
# use fill instead of color for each bar
# position makes the bars next to each other, side by side comparisons
# histograms does not need any y axis to be defined
ggplot(weather_df, aes(x = tmax, fill = name)) +
geom_histogram(position = "dodge", binwidth = 2)
## Warning: Removed 3 rows containing non-finite values (stat_bin).
# why three panels did not work?
geom_hex()
## geom_hex: na.rm = FALSE
## stat_binhex: na.rm = FALSE
## position_identity
# Jeff's favorite histograms
ggplot(weather_df, aes(x = tmax, fill = name)) +
geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).
ggplot(weather_df, aes(x = name, y = tmax)) + geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
# violine plots occational useful only if there some too many groups to make the comparisons, can viz it easily.
ggplot(weather_df, aes(x = name, y = tmax)) +
geom_violin(aes(fill = name), color = "blue", alpha = .5) +
stat_summary(fun.y = median, geom = "point", color = "blue", size = 4)
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_summary).
# Jeff likes ridges plots
# each of the densities of each location, gives you advantages of looking at each density easily, a tall ridge plot to identify bimodelity to do comparisons among groups (website)
ggplot(weather_df, aes(x = tmax, y = name)) +
geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).
## piping will make the plots using the same dataset over and over again. ## se = FAUSE use standard errors as CI might be confusing ## connect ggplot segments using +, with piping use %>%
ggp_ridge_temp =
weather_df %>%
ggplot(aes(x = tmax, y = name)) +
geom_density_ridges(scale = .85)
ggsave("ggplot_temp_ridge.pdf", ggp_ridge_temp)
## Saving 12 x 5 in image
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).
ggp_ridge_temp =
weather_df %>%
ggplot(aes(x = tmax, y = name)) +
geom_density_ridges(scale = .85)
ggsave("ggplot_temp_ridge.pdf", ggp_ridge_temp)
## Saving 12 x 5 in image
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).
ggplot(weather_df, aes(x = prcp)) +
geom_density(aes(fill = name), alpha = .5)
## Warning: Removed 3 rows containing non-finite values (stat_density).
ggplot(weather_df, aes(x = prcp, y = name)) +
geom_density_ridges(scale = .85)
## Picking joint bandwidth of 4.61
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).
ggplot(weather_df, aes(y = prcp, x = name)) +
geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
weather_df %>%
filter(prcp > 0) %>%
ggplot(aes(x = prcp, y = name)) +
geom_density_ridges(scale = .85)
## Picking joint bandwidth of 19.7
weather_plot = ggplot(weather_df, aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5)
ggsave("weather_plot.pdf", weather_plot, width = 8, height = 5)
## Warning: Removed 15 rows containing missing values (geom_point).
knitr::opts_chunk$set(
fig.width = 6,
fig.asp = .6,
out.width = "90%"
)
ggplot(weather_df, aes(x = tmin, y = tmax)) +
geom_point(aes(color = name))
## Warning: Removed 15 rows containing missing values (geom_point).
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.